{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第二节：数据探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       ".dataframe td,.dataframe tr,.dataframe thead th { \n",
       "    note:'pandas表格属性';\n",
       "    white-space: auto;\n",
       "    text-align:left;\n",
       "    border:1px solid;\n",
       "    font-size:12px\n",
       "}\n",
       ".input_prompt{\n",
       "    note:'隐藏cell左边的提示如 In[12]以便于截图';\n",
       "#     display:none;\n",
       "}\n",
       "div.output_text {\n",
       "    note:'输出内容的高度';\n",
       "    max-height: 500px;\n",
       "}\n",
       "div.output_area img{\n",
       "    note:'输出图片的宽度';\n",
       "    max-width:100%\n",
       "}\n",
       "div.output_scroll{\n",
       "    note:'禁用输出的阴影';\n",
       "    box-shadow: none;\n",
       "}\n",
       "</style>\n",
       "<h5>!!以上是作者为了排版而修改的排版效果，请注意是否需要使用!!</h5>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%html\n",
    "<style>\n",
    ".dataframe td,.dataframe tr,.dataframe thead th { \n",
    "    note:'pandas表格属性';\n",
    "    white-space: auto;\n",
    "    text-align:left;\n",
    "    border:1px solid;\n",
    "    font-size:12px\n",
    "}\n",
    ".input_prompt{\n",
    "    note:'隐藏cell左边的提示如 In[12]以便于截图';\n",
    "#     display:none;\n",
    "}\n",
    "div.output_text {\n",
    "    note:'输出内容的高度';\n",
    "    max-height: 500px;\n",
    "}\n",
    "div.output_area img{\n",
    "    note:'输出图片的宽度';\n",
    "    max-width:100%\n",
    "}\n",
    "div.output_scroll{\n",
    "    note:'禁用输出的阴影';\n",
    "    box-shadow: none;\n",
    "}\n",
    "</style>\n",
    "<h5>!!以上是作者为了排版而修改的排版效果，请注意是否需要使用!!</h5>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.1 训练集数据探索"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.1.1 数据特征类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导入相关应用包\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# 忽略警告信息\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "# 读取数据\n",
    "path  = './dataset/'\n",
    "train = pd.read_csv(path + 'security_train.csv') # 训练集\n",
    "test  = pd.read_csv(path + 'security_test.csv')  # 测试集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_id</th>\n",
       "      <th>label</th>\n",
       "      <th>api</th>\n",
       "      <th>tid</th>\n",
       "      <th>index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>LdrLoadDll</td>\n",
       "      <td>2488</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>LdrGetProcedureAddress</td>\n",
       "      <td>2488</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>LdrGetProcedureAddress</td>\n",
       "      <td>2488</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>LdrGetProcedureAddress</td>\n",
       "      <td>2488</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>LdrGetProcedureAddress</td>\n",
       "      <td>2488</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   file_id  label                     api   tid  index\n",
       "0        1      5              LdrLoadDll  2488    0.0\n",
       "1        1      5  LdrGetProcedureAddress  2488    1.0\n",
       "2        1      5  LdrGetProcedureAddress  2488    2.0\n",
       "3        1      5  LdrGetProcedureAddress  2488    3.0\n",
       "4        1      5  LdrGetProcedureAddress  2488    4.0"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_id</th>\n",
       "      <th>label</th>\n",
       "      <th>tid</th>\n",
       "      <th>index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>35952.000000</td>\n",
       "      <td>35952.000000</td>\n",
       "      <td>35952.000000</td>\n",
       "      <td>35951.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>5.142051</td>\n",
       "      <td>0.989152</td>\n",
       "      <td>2494.964564</td>\n",
       "      <td>2153.216267</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.547382</td>\n",
       "      <td>1.957361</td>\n",
       "      <td>129.979938</td>\n",
       "      <td>1537.349809</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>282.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>4.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2456.000000</td>\n",
       "      <td>722.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>5.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2500.000000</td>\n",
       "      <td>2004.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>7.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2596.000000</td>\n",
       "      <td>3502.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>9.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>2980.000000</td>\n",
       "      <td>5000.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            file_id         label           tid         index\n",
       "count  35952.000000  35952.000000  35952.000000  35951.000000\n",
       "mean       5.142051      0.989152   2494.964564   2153.216267\n",
       "std        2.547382      1.957361    129.979938   1537.349809\n",
       "min        1.000000      0.000000    282.000000      0.000000\n",
       "25%        4.000000      0.000000   2456.000000    722.000000\n",
       "50%        5.000000      0.000000   2500.000000   2004.000000\n",
       "75%        7.000000      0.000000   2596.000000   3502.000000\n",
       "max        9.000000      5.000000   2980.000000   5000.000000"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.1.2 数据分布探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='tid'>"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAEGCAYAAABbzE8LAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAANb0lEQVR4nO3df2yc913A8fenMRtRTWnXlJJ5FbfVTKxrRmnMtD8YKkVlIUGqBAhpm2hgqtA6LWkifmij0eKCkTYGoiVCm0phNFCxwTZUqLqsQSJDQmqnc9Um+5HRa+epy8LoUqLVbdIpyZc/7nFzcc9nO/Xd5/Hl/ZJOuzz35Pr9+O55+/Hj2YlSCpKkwbsoewGSdKEywJKUxABLUhIDLElJDLAkJRlZzs7r1q0rjUajT0uRpOE0PT39vVLKFfO3LyvAjUaDZrO5cquSpAtARHyr23YvQUhSEgMsSUkMsCQlMcCSlMQAS1ISAyxJSQywJCUxwJKUxABLUhIDLElJDLAkJTHAkpTEAEtSEgMsSUkMsCQlMcCSlMQAS1ISAyxJSQywJCVZ1r8JJw3KrbfeyvHjxxkbG8teysvGx8fZtm1b9jI0RAywauno0aPMvvAi//NSPd6ia158LnsJGkL1eHdL3awZ4cRPbc5eBQBrDz+UvQQNIa8BS1ISAyxJSQywJCUxwJKUxABLUhIDLElJDLAkJTHAkpTEAEtSEgMsSUkMsCQlMcCSlMQAS1ISAyxJSQywJCUxwJKUxABLUhIDLElJDLAkJTHAkpTEAEtSEgMsSUkMsCQlMcCSlMQAS1ISAyxJSQywJCUxwJKUxABLUhIDLElJDLAkJTHAkpTEAEtSEgMsSUkMsCQlMcCSlMQAS1ISAyxJSQywJCUxwJKUxABLUhIDrIHYs2cPe/bsyV7GBcWPef2NZC9AF4ZWq5W9hAuOH/P68wxYkpIYYElKYoAlKYkBlqQkBliSkhhgSUpigCUpiQGWpCQGWJKSGGBJSmKAJSmJAZakJAZYkpIYYElKYoAlKYkBlqQkBliSkhhgSUpigCUpiQGWpCQGWJKSGGBJSmKAJSmJAZakJAZYkpIYYElKYoAlKYkBlqQkBliSkhhgSUpigCUpiQGWpCQGWJKSGGBJSmKAJSmJAZakJAZYkpIYYElKMpAA33DDDS/f6mSl1rXQ8zSbTW688Uamp6dX9L8nDYNWq8WWLVt44IEHzjlO5j/earUAOHbsGNu3b6fVarF9+3aOHTvWl3V1HqfzbyvNM+A+mpyc5MyZM+zevTt7KVLtTE1N8cILL3DXXXd1PU7mHp+amgLgvvvu49ChQ0xNTXHo0CH27t2bsewV1fcAz/+sUZezv5Va10LP02w2mZ2dBWB2dra2HwcpQ6vVYmZmBoBSCtA+TubOgjsfn5mZodlssm/fPkopzMzMUEph3759K34WvNhxudLH7ciKPpteNjk5mb2EWjly5AgnTpzg9ttvX9L+J06cgNLnRS3DRSe/T6v1/JLXXwetVou1a9dmL6OrubPa+Xbv3s2DDz74isfnvprsdPr0afbu3cvOnTv7ts5+W/QMOCJ+JyKaEdF89tlnB7GmoTB39ivplebObuebO27mPz47O8upU6fO2Xbq1Cn279/fj+UNzKJnwKWUe4B7ACYmJmp0TlJvo6OjRrjD2NgYAHffffeS9t+yZQuzJ3/QzyUty5kfvoTxN1255PXXQZ3P1huNRtcIj46Odn18dHSUkydPnhPhkZERbrrppn4vta/8JlyfeAlCWtiuXbu6br/zzju7Pj45OclFF52bqzVr1nDLLbf0Z4ED0vcAHzhwoOefs6zUuhZ6nomJiZc/m4+Ojtb24yBlGB8fp9FoABARQPs42bhx4ysebzQaTExMsGnTJiKCRqNBRLBp0yYuv/zyFV3XYsflSh+3ngH30dxn7bnP6pLO2rVrFxdffDE7duzoepzMPT53Nrx161Y2bNjArl272LBhw6o/+wWIuf8LyFJMTEyUZrPZx+VoWM1dj1zuNeDZ63+zn8tasrWHH2LjKr0GvJrWPKwiYrqUMjF/u2fAkpTEAEtSEgMsSUkMsCQlMcCSlMQAS1ISAyxJSQywJCUxwJKUxABLUhIDLElJDLAkJTHAkpTEAEtSEgMsSUkMsCQlMcCSlMQAS1ISAyxJSQywJCUxwJKUxABLUhIDLElJDLAkJTHAkpTEAEtSEgMsSUkMsCQlMcCSlMQAS1ISAyxJSQywJCUxwJKUxABLUhIDLElJDLAkJTHAkpTEAEtSkpHsBejCMD4+nr2EC44f8/ozwBqIbdu2ZS/hguPHvP68BCFJSQywJCUxwJKUxABLUhIDLElJDLAkJTHAkpTEAEtSEgMsSUkMsCQlMcCSlMQAS1ISAyxJSQywJCUxwJKUxABLUhIDLElJDLAkJTHAkpTEAEtSEgMsSUkMsCQlMcCSlMQAS1ISAyxJSQywJCUxwJKUxABLUhIDLElJDLAkJTHAkpTEAEtSEgMsSUkMsCQlMcCSlMQAS1ISAyxJSQywJCUxwJKUxABLUpKR7AVICzp9irWHH8peBQBrXnwOuDJ7GRoyBli1tH79eo4fP87YWF2idyXj4+PZi9CQMcCqpXvvvTd7CVLfeQ1YkpIYYElKYoAlKYkBlqQkBliSkhhgSUpigCUpiQGWpCQGWJKSGGBJSmKAJSmJAZakJAZYkpIYYElKYoAlKYkBlqQkBliSkhhgSUpigCUpiQGWpCRRSln6zhHPAt9a4u7rgO+dz6JqZljmgOGZxTnqZ1hm6dccP1FKuWL+xmUFeDkiollKmejLkw/QsMwBwzOLc9TPsMwy6Dm8BCFJSQywJCXpZ4Dv6eNzD9KwzAHDM4tz1M+wzDLQOfp2DViS1JuXICQpiQGWpCRLDnBEXBUR/xERX4uIr0bE7dX2j0fE4Yg4GBH/EhGXdvydD0dEKyK+ERHv6ti+qdrWiogPrehE5z/HH1czPB4RD0fE66vtERF/Wa31YERc3/FcWyPiyeq2dZBz9Jql4/HfjYgSEevqPEuP12QyIo5Ur8njEbG54+/U7r3Va5bqsW3VsfLViPjTOs/S4zX5TMfrMRMRj6/SOa6LiEeqOZoR8fZq+2CPkVLKkm7AeuD66v6PAP8NXAP8EjBSbf8Y8LHq/jXAE8BrgTcCTwFrqttTwJuA11T7XLPUdbzaW485LunYZzvwyer+ZuALQADvAB6ttr8OeLr638uq+5cNao5es1R/vgr4Iu0fnFlX51l6vCaTwO912b+W761FZvkF4N+B11aP/VidZ+n13urY58+Bj6zGOYCHgV/uOC4OZBwjSz4DLqUcLaU8Vt1/Hvg6MFZKebiUcqra7RHgDdX9m4FPl1JeKqV8E2gBb69urVLK06WUHwCfrvYdiB5zfL9jt4uBue9O3gzsLW2PAJdGxHrgXcD+UspzpZT/A/YDmwY1Byw8S/XwXwB/0DEH1HSWReboppbvLeg5y23AR0spL1WP/W+dZ1nsNYmIAH4D+MdVOkcBLql2+1HgOx1zDOwYOa9rwBHRAH4GeHTeQ++j/dkD2kM+0/HYt6ttC20fuPlzRMSfRMQzwHuBj1S71X4OOHeWiLgZOFJKeWLebrWfpct764PVl4J/GxGXVdtqPwe8YpY3A++MiEcj4ksR8bPVbrWfZYHj/Z3Ad0spT1Z/Xm1z7AA+Xh3vfwZ8uNptoHMsO8ARMQp8DtjRedYYEXcAp4D7X+2iBqHbHKWUO0opV9Ge4YOZ61uOzllovwZ/yNlPIKtGl9fkE8DVwHXAUdpf8q4KXWYZof3l6zuA3wf+qTqLrLWFjnfg3Zw9+629LnPcBuysjvedwN9krGtZAY6IH6I9xP2llM93bP8t4FeA95bqgglwhPZ1yDlvqLYttH1gFpqjw/3Ar1X3azsHdJ3latrX4J6IiJlqXY9FxI9T41m6vSallO+WUk6XUs4Af037y1l6rDd9Dljw/fVt4PPVl7ZfBs7Q/sUvtZ2lx/E+Avwq8JmO3VfbHFuBufv/TNZ7axkXswPYC9w1b/sm4GvAFfO2v5VzL8o/TfuC/Eh1/42cvSj/1ld7MXsF5vjJjvvbgM9W97dw7kX5L5ezF+W/SfuC/GXV/dcNao5es8zbZ4az34Sr5Sw9XpP1Hfd30r7GWNv31iKzvB/4o+r+m2l/ORt1naXXe6s65r80b9uqmoP2teAbqvu/CExnHCPLGeTnaF+4Pgg8Xt02077Y/kzHtk92/J07aH8H9BtU33Gstm+m/d3Ip4A7BnyALDTH54CvVNv/jfY35uZewL+q1noImOh4rvdV87eA3x7kHL1mmbfPDGcDXMtZerwmf1+t8yDwr5wb5Nq9txaZ5TXAP1TvsceAG+s8S6/3FvB3wPu7/J1VM0e1fZr2J4RHgY0Zx4g/iixJSfxJOElKYoAlKYkBlqQkBliSkhhgSUpigLWqRMSlEfGB6v7rI+KzC+x3ICJW/T8SqeFmgLXaXAp8AKCU8p1Syq/nLkc6fyPZC5CW6aPA1dXvoX0SeEsp5dqIWAt8Cvhp4DCwNm+J0tIYYK02HwKuLaVcV/12qwer7bcBL5ZS3hIRb6P902ZSrXkJQsPi52n/qC+llIO0f/RUqjUDLElJDLBWm+dp/9My8/0n8B6AiLgWeNsgFyWdD68Ba1UppRyLiP+KiK/Q/pWCcz4BfCoivl5tn05ZoLQM/jY0SUriJQhJSmKAJSmJAZakJAZYkpIYYElKYoAlKYkBlqQk/w8fN12SFSqSDQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x=train.iloc[:10000][\"tid\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "file_id       9\n",
       "label         3\n",
       "api         166\n",
       "tid          56\n",
       "index      5001\n",
       "dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.nunique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.1.3 数据缺失值探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    35951.000000\n",
       "mean      2153.216267\n",
       "std       1537.349809\n",
       "min          0.000000\n",
       "25%        722.000000\n",
       "50%       2004.000000\n",
       "75%       3502.000000\n",
       "max       5000.000000\n",
       "Name: index, dtype: float64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['index'].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.1.4 奇异值探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    35951.000000\n",
       "mean      2153.216267\n",
       "std       1537.349809\n",
       "min          0.000000\n",
       "25%        722.000000\n",
       "50%       2004.000000\n",
       "75%       3502.000000\n",
       "max       5000.000000\n",
       "Name: index, dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['index'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    35952.000000\n",
       "mean      2494.964564\n",
       "std        129.979938\n",
       "min        282.000000\n",
       "25%       2456.000000\n",
       "50%       2500.000000\n",
       "75%       2596.000000\n",
       "max       2980.000000\n",
       "Name: tid, dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['tid'].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.1.5 标签分布探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    28350\n",
       "5     6786\n",
       "2      816\n",
       "Name: label, dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['label'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABeEAAAH+CAYAAAAbE8XCAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAABcSAAAXEgFnn9JSAAAuyklEQVR4nO3de7BtZXku+OeNm4vcBCN4PKLYYohHFO+CEm+AaW+0KCSpmI4tmnTa1pY0cFKxGikUK4nWkVaLmFSOAvZJ5bQ2mhCC0YQyKEEMiSgY8ATEjop3QeTmBoxv/zHHsqdrz7X23rC+vfZm/35Vq8Ya3xjPHN9cVdZyP+vjm9XdAQAAAAAA1t7PrPcEAAAAAADg/koJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMsmG9J7Azq6pvJdkjydfWey4AAAAAAKzoEUnu7O5/t7XB6u4B82FLVNWtu+22294HH3zwek8FAAAAAIAV3HDDDbnrrrtu6+59tjZrJfz6+trBBx/8uGuuuWa95wEAAAAAwAoOPfTQXHvttfdqRxN7wgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgkA3rPQG4P3nU71603lMAVvGvf/CS9Z4CAAAAsJOxEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGOQ+l/BVtUdVHVdV76+qf6mqjVV1R1VdVVWnV9VeCzJnVFWv8vUHqzzvyKr6aFXdXFW3V9UVVfWqzczxwKo6t6q+Mc3vuqp6S1XtvkrmgVX11unejVP2nKp6+Nb9hAAAAAAA2FltWIPXeGWS/zx9/8Ukf5lknyTPSvKWJL9aVc/t7u8syF6W5EsLxj+76EFVdXySD2b2x4NPJflekqOTfKCqDuvuUxdkHpPk8iQPSfLPSS5N8rQkpyc5uqqO7u67lmV2T/KJJEck+WaSC5I8KsmJSV5aVUd095cX/jQAAAAAAGCyFiX8PUn+JMm7uvuLS4NV9bAkFyV5cpJ3ZVbWL/e+7j5vSx5SVQ9Ock6SByQ5vrs/Mo0/NMnfJzmlqv6quy9ZFj0vswL+Pd190pTZkORDSV6e5E1JzliWOS2zAv7yJL/Y3bdPuZOTvHOax/O2ZN4AAAAAAOy87vN2NN39ge7+rfkCfhr/ZpLXT6evqKpd7+OjfiOzFfYXLBXw03O+neR3ptNT5gNV9YwkRyb5ztw96e4fJXldZn9AeONUyi9ldk3yhun09UsF/JQ7K8nVSZ5bVU+9j+8HAAAAAID7udEfzHrVdNwtyc/ex9d6yXQ8f8G1i5JsTHLMsn3elzIXLt9yZirvL02yX5JfmLt0ZJIHJbmhuz+34FlLzz9266YPAAAAAMDOZnQJ/+jpeE+SmxdcP6qq3lVVf1xVp21mdfkTp+OVyy90992Z7fe+e5JDtiSzbPyw+5gBAAAAAIBNrMWe8Ks5aTp+bPlK9MmvLzs/s6o+nOTV89vAVNU+ma1OT5IbV3jWjZl94OpBmW0ZkySP3IJMpsySe5NZVVVds8Klg7f0NQAAAAAA2PEMWwlfVS9O8trMVsG/ednlLyU5NcmhSfZK8ogkv5bk60mOT/Jflt2/19z3d67wyDum494LcqMzAAAAAACwiSEr4avqsUn+NEkl+Y/dfdX89e7+02WRO5L8WVX9XZIvJDmuqo7o7s+MmN+21t2HLhqfVsg/bhtPBwAAAACAbWTNV8JX1cOTfCyzDzw9q7vfvaXZ7v5mknOn0xfOXbp97vs9VojvOR1vW5AbnQEAAAAAgE2saQlfVQ9O8jeZ7Zd+bmZbzmyt66fjw5YGuvvWJD+YTg9cIbc0/pW5sa9uowwAAAAAAGxizUr4qtoryV9ntr3KR5L8Znf3vXip/abjHcvGl7a0ecqCZ++S5PFJNia5bksyy8avnhu7NxkAAAAAANjEmpTwVbVbkguSPCPJx5P8anf/2714nUry8un0ymWXL5qOJyyIvjTJ7kku7u6NCzLHTnOcf9ZDkzw7yfeTXDZ36bLMVt0fXFVPWvCspedfuPI7AQAAAACANSjhq+oBSf5rkqOSXJrkFd199yr3719Vr6+qvZeN75Xkj5IcnuRbma2mn/e+JLcmeVlVvWIud0CSd0yn75wPdPcVmZXqByR5+1xmQ5L3JtklyXu6+565zN1Jzp5O/7Cq9pzLnZzksCSf7O7PrvQeAQAAAAAgSTaswWu8If//6vXvJXnvbEH7Jk7t7u9l9sGmZyf5g6r6xyTfTLJ/Ztu8/GySW5Kc0N13zoe7++aqek2SDyU5v6ouSXJTkmOS7JvZh8BesuC5Jya5PMlJVXVUkmuTPD3Jo5N8OsnvL8i8bXrdZyW5vqouzWyf+8OTfDfJa1b7gQAAAAAAQLI2Jfx+c9+/fMW7kjMyK+lvymxV+hFJDsms6P63JP9vkvOS/J/d/fVFL9DdH66q5yQ5bcrvmlmpfnZ3f2CFzPVV9eQkb03ywmmOX01yZpLf6+67FmQ2VtXzk7wpySuTHJfk5ml+b+7uG1d5nwAAAAAAkGQNSvjuPiOzgn1L778tye/eh+ddluRFW5n5WmYr4rcm88Mkp09fAAAAAACw1dbkg1kBAAAAAIBNKeEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAxyn0v4qtqjqo6rqvdX1b9U1caquqOqrqqq06tqr1Wyr66qK6rq9qq6uao+WlXP2szzjpzuu3nKXVFVr9pM5sCqOreqvjHN77qqektV7b5K5oFV9dbp3o1T9pyqevjmfyoAAAAAALA2K+FfmeTPk7wmyb8l+csklyb575K8Jck/VtUBy0NV9a4k5yZ5fJKLk1yR5AVJPlVVxy16UFUdn+STSV6Y5OokH0vyc0k+UFX/aYXMY5J8Lsmrk9yU5IIkD0hyepKLq2q3BZndk3wiyZuT7DVlvpbkxCSfq6pHr/oTAQAAAACArE0Jf0+SP0nyuO5+XHf/cne/MMnPZ1Z+PzbJu+YDVXVMkpMyK8Wf2N3HTZnnZFbkn1tV+y7LPDjJOZkV6Cd09/O6+4Tp9b+U5JSqet6C+Z2X5CFJ3tPdT+juX5nm9udJjkzypgWZ05IckeTyJId096909+FJTkmy/zQPAAAAAABY1X0u4bv7A939W939xWXj30zy+un0FVW169zlk6fj27r7+rnM5Un+OMm+SV677FG/kWSfJBd090fmMt9O8jvT6Snzgap6RmZF+3fm7kl3/yjJ6zL7A8Ibq2rDXGbXJG+YTl/f3bfP5c7KbAX+c6vqqQt/IAAAAAAAMBn9waxXTcfdkvxsMttrPclR0/j5CzJLY8cuG3/JKpmLkmxMcsyyfd6XMhd2913zgam8vzTJfkl+Ye7SkUkelOSG7v7cVswPAAAAAAB+yugSfmnv9HuS3Dx9//OZlfLf7e4bF2SunI6HLRt/4rLrP9Hddyf55yS7JzlkSzKrPOveZAAAAAAAYBMbNn/LfXLSdPzY3Er0R07HRQV8uvuOqrolyX5VtXd331ZV+2S2On3F3DT+tCQHZbZlzGafNTd+0NzYvcmsqqquWeHSwVv6GgAAAAAA7HiGrYSvqhdntq/7PUnePHdpr+l45yrxO6bj3ssyq+WWZ7bkWWuVAQAAAACATQxZCV9Vj03yp0kqyX/s7qs2E7lf6+5DF41PK+Qft42nAwAAAADANrLmK+Gr6uFJPpbZB56e1d3vXnbL7dNxj1VeZs/peNuyzGq55ZktedZaZQAAAAAAYBNrWsJX1YOT/E1m+6Wfm+TUBbd9dToeuMJr7Jlk3yTf7+7bkqS7b03yg9Vyc+Nf2dJnrWEGAAAAAAA2sWYlfFXtleSvM9te5SNJfrO7e8Gt/5LkriT7T6vml3vKdLx62fhVy67PP3uXJI9PsjHJdVuSWeVZ9yYDAAAAAACbWJMSvqp2S3JBkmck+XiSX+3uf1t0b3f/MMknptNfWnDLCdPxwmXjFy27Pu+lSXZPcnF3b1yQOXaa4/ycH5rk2Um+n+SyuUuXZbbq/uCqetJWzA8AAAAAAH7KfS7hq+oBSf5rkqOSXJrkFd1992ZiZ03H06rq5+Ze65lJfivJLUnevyzzviS3JnlZVb1iLnNAkndMp++cD3T3FZmV6gckeftcZkOS9ybZJcl7uvueuczdSc6eTv9w2h5nKXdyksOSfLK7P7uZ9wgAAAAAwE5uwxq8xhuSvHz6/ntJ3ltVi+47tbu/lyTdfXFVvTvJSUk+X1V/m2TXJC9IUklO7O5b5sPdfXNVvSbJh5KcX1WXJLkpyTGZ7SF/VndfsuC5Jya5PMlJVXVUkmuTPD3Jo5N8OsnvL8i8bXrdZyW5vqouzWyf+8OTfDfJa1b9iQAAAAAAQNamhN9v7vuXr3hXckZmJX2SpLt/u6o+n1mJ/4Ikdye5OMmZ3f3pRS/Q3R+uquckOS3JEZkV99cmObu7P7BC5vqqenKStyZ54TTHryY5M8nvdfddCzIbq+r5Sd6U5JVJjktyc5Lzkry5u29c5X0CAAAAAECSNSjhu/uMzAr2e5M9L7Nie2sylyV50VZmvpbZivityfwwyenTFwAAAAAAbLU1+WBWAAAAAABgU0p4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDrEkJX1VPrarfraqPVNWNVdVV1avcf8bSPSt8/cEq2SOr6qNVdXNV3V5VV1TVqzYzvwOr6tyq+kZVbayq66rqLVW1+yqZB1bVW6d7N07Zc6rq4Vv2UwEAAAAAYGe3YY1e581JXnYvcpcl+dKC8c8uurmqjk/ywcz+ePCpJN9LcnSSD1TVYd196oLMY5JcnuQhSf45yaVJnpbk9CRHV9XR3X3XsszuST6R5Igk30xyQZJHJTkxyUur6oju/vJWv1sAAAAAAHYqa1XCX57k6iT/OH39a5LdtiD3vu4+b0seUFUPTnJOkgckOb67PzKNPzTJ3yc5par+qrsvWRY9L7MC/j3dfdKU2ZDkQ0lenuRNSc5YljktswL+8iS/2N23T7mTk7xzmsfztmTeAAAAAADsvNZkO5rufnt3n97dF3b3t9biNRf4jST7JLlgqYCfnv3tJL8znZ4yH6iqZyQ5Msl35u5Jd/8oyeuS3JPkjVMpv5TZNckbptPXLxXwU+6szP7Y8NyqeuravTUAAAAAAO6PdqQPZn3JdDx/wbWLkmxMcsyyfd6XMhcu33JmKu8vTbJfkl+Yu3RkkgcluaG7P7fgWUvPP3brpg8AAAAAwM5mvUv4o6rqXVX1x1V12mZWlz9xOl65/EJ3353Zfu+7JzlkSzLLxg+7jxkAAAAAANjEWu0Jf2/9+rLzM6vqw0lePb8NTFXtk9nq9CS5cYXXujGzD1w9KLMtY5LkkVuQyZRZcm8yq6qqa1a4dPCWvgYAAAAAADue9VoJ/6UkpyY5NMleSR6R5NeSfD3J8Un+y7L795r7/s4VXvOO6bj3gtzoDAAAAAAAbGJdVsJ3958uG7ojyZ9V1d8l+UKS46rqiO7+zLaf3drr7kMXjU8r5B+3jacDAAAAAMA2st57wv+U7v5mknOn0xfOXbp97vs9VojvOR1vW5AbnQEAAAAAgE1sVyX85Prp+LClge6+NckPptMDV8gtjX9lbuyr2ygDAAAAAACb2B5L+P2m4x3Lxq+ajk9ZHqiqXZI8PsnGJNdtSWbZ+NVzY/cmAwAAAAAAm9iuSviqqiQvn06vXHb5oul4woLoS5PsnuTi7t64IHNsVe227FkPTfLsJN9PctncpcsyW3V/cFU9acGzlp5/4crvBAAAAAAA1qGEr6r9q+r1VbX3svG9kvxRksOTfCvJR5ZF35fk1iQvq6pXzOUOSPKO6fSd84HuviKzUv2AJG+fy2xI8t4kuyR5T3ffM5e5O8nZ0+kfVtWec7mTkxyW5JPd/dmtfOsAAAAAAOxkNqzFi1TVS5K8eW5o12n8M3NjZ3b3RZl9sOnZSf6gqv4xyTeT7J/ZNi8/m+SWJCd0953zz+jum6vqNUk+lOT8qrokyU1Jjkmyb5KzuvuSBdM7McnlSU6qqqOSXJvk6UkeneTTSX5/QeZt0+s+K8n1VXVpkoMy+wPBd5O8ZnM/EwAAAAAAWKuV8PtnVlAvfdU0Pj+2/zR2U2ar0j+b5JAkxyc5MrPV7+9M8vjunt8e5ie6+8NJnpPk40menOTFSb6U5NXdfcoKmeune8+b5vDyJD9OcmaSo7v7rgWZjUmeP91zZ5LjMivhz0vylO7+8uZ+IAAAAAAAsCYr4bv7vMwK6i2597Ykv3sfnnVZkhdtZeZrma2I35rMD5OcPn0BAAAAAMBW264+mBUAAAAAAO5PlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZZkxK+qp5aVb9bVR+pqhurqquqtyD36qq6oqpur6qbq+qjVfWszWSOnO67ecpdUVWv2kzmwKo6t6q+UVUbq+q6qnpLVe2+SuaBVfXW6d6NU/acqnr45t4XAAAAAAAkyYY1ep03J3nZ1gSq6l1JTkrywyR/k2T3JC9I8otVdUJ3/8WCzPFJPpjZHw8+leR7SY5O8oGqOqy7T12QeUySy5M8JMk/J7k0ydOSnJ7k6Ko6urvvWpbZPcknkhyR5JtJLkjyqCQnJnlpVR3R3V/emvcLAAAAAMDOZ622o7k8yZlJ/ockD0ty12o3V9UxmRXwNyV5Yncf190vTPKcJP+W5Nyq2ndZ5sFJzknygCQndPfzuvuEJI9N8qUkp1TV8xY87rzMCvj3dPcTuvtXkvx8kj9PcmSSNy3InJZZAX95kkO6+1e6+/AkpyTZf5oHAAAAAACsak1K+O5+e3ef3t0Xdve3tiBy8nR8W3dfP/c6lyf54yT7JnntssxvJNknyQXd/ZG5zLeT/M50esp8oKqekVnR/p25e9LdP0ryuiT3JHljVW2Yy+ya5A3T6eu7+/a53FlJrk7y3Kp66ha8TwAAAAAAdmLb/INZq+qBSY6aTs9fcMvS2LHLxl+ySuaiJBuTHLNsn/elzIXLt5yZyvtLk+yX5BfmLh2Z5EFJbujuz23F/AAAAAAA4Kds8xI+s61gdkvy3e6+ccH1K6fjYcvGn7js+k90992Z7fe+e5JDtiSzyrPuTQYAAAAAADaxVh/MujUeOR0XFfDp7juq6pYk+1XV3t19W1Xtk9nq9BVz0/jTkhyU2ZYxm33W3PhBWzq/FTKrqqprVrh08Ja+BgAAAAAAO571WAm/13S8c5V77piOey/LrJZbntmSZ61VBgAAAAAANrEeK+F3Ot196KLxaYX847bxdAAAAAAA2EbWYyX87dNxj1Xu2XM63rYss1pueWZLnrVWGQAAAAAA2MR6lPBfnY4HLrpYVXsm2TfJ97v7tiTp7luT/GC13Nz4V7b0WWuYAQAAAACATaxHCf8vSe5Ksn9VPXzB9adMx6uXjV+17PpPVNUuSR6fZGOS67Yks8qz7k0GAAAAAAA2sc1L+O7+YZJPTKe/tOCWE6bjhcvGL1p2fd5Lk+ye5OLu3rggc2xV7TYfqKqHJnl2ku8nuWzu0mWZrbo/uKqetBXzAwAAAACAn7IeK+GT5KzpeFpV/dzSYFU9M8lvJbklyfuXZd6X5NYkL6uqV8xlDkjyjun0nfOB7r4is1L9gCRvn8tsSPLeJLskeU933zOXuTvJ2dPpH07b4yzlTk5yWJJPdvdnt+4tAwAAAACws9mwFi9SVS9J8ua5oV2n8c/MjZ3Z3RclSXdfXFXvTnJSks9X1d9OmRckqSQndvct88/o7pur6jVJPpTk/Kq6JMlNSY7JbA/5s7r7kgXTOzHJ5UlOqqqjklyb5OlJHp3k00l+f0HmbdPrPivJ9VV1aZKDkhye5LtJXrPZHwoAAAAAADu9tVoJv39mBfXSV03j82P7zwe6+7czK8i/mFn5/swkFyd5Tnf/xaKHdPeHkzwnyceTPDnJi5N8Kcmru/uUFTLXT/eeN83h5Ul+nOTMJEd3910LMhuTPH+6584kx2VWwp+X5Cnd/eVVfhYAAAAAAJBkjVbCd/d5mRXUw3PdfVmSF21l5muZFf5bk/lhktOnLwAAAAAA2GrrtSc8AAAAAADc7ynhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhAQAAAABgECU8AAAAAAAMooQHAAAAAIBBlPAAAAAAADCIEh4AAAAAAAZRwgMAAAAAwCAb1nsCAAAAANw7T/jAE9Z7CsAqvvA/fWG9p8B2wEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGCQdS3hq+qSqupVvl64Qu7VVXVFVd1eVTdX1Uer6lmbedaR0303T7krqupVm8kcWFXnVtU3qmpjVV1XVW+pqt3vy/sGAAAAAGDnsGG9JzD5cJLbF4x/fflAVb0ryUlJfpjkb5LsnuQFSX6xqk7o7r9YkDk+yQcz+6PDp5J8L8nRST5QVYd196kLMo9JcnmShyT55ySXJnlaktOTHF1VR3f3XVv9TgEAAAAA2GlsLyX8qd39r5u7qaqOyayAvynJM7v7+mn8mUkuSXJuVV3S3bfMZR6c5JwkD0hyfHd/ZBp/aJK/T3JKVf1Vd1+y7HHnZVbAv6e7T5oyG5J8KMnLk7wpyRn36t0CAAAAALBT2NH2hD95Or5tqYBPku6+PMkfJ9k3yWuXZX4jyT5JLlgq4KfMt5P8znR6ynygqp6R5Mgk35m7J939oySvS3JPkjdOpTwAAAAAACy0w5TwVfXAJEdNp+cvuGVp7Nhl4y9ZJXNRko1Jjlm2z/tS5sLlW85M5f2lSfZL8gtbNnsAAAAAAHZG20sJ/9qqem9VnV1Vb6yqRy645+eT7Jbku91944LrV07Hw5aNP3HZ9Z/o7rsz2+999ySHbElmM88CAAAAAICf2F62Uzlt2fl/qqozu/vMubGlYn5RAZ/uvqOqbkmyX1Xt3d23VdU+SR60Wm4af1qSg5JcvSXPmhs/aIXrP6Wqrlnh0sFbkgcAAAAAYMe03ivhP5Xk1zMro/fIbLX7/5HkR0neWlUnzd2713S8c5XXu2M67r0ss1pueWZLnrUoAwAAAAAAP2VdV8J39+nLhq5L8ntV9U9JPp7kjKr6k+7+4baf3drp7kMXjU8r5B+3jacDAAAAAMA2st4r4Rfq7r9J8k9J9k1y+DR8+3TcY5XontPxtmWZ1XLLM1vyrEUZAAAAAAD4KdtlCT+5fjo+bDp+dToeuOjmqtozs9L++919W5J0961JfrBabm78K3Njqz5rhQwAAAAAAPyU7bmE3286Lu2//i9J7kqyf1U9fMH9T5mOVy8bv2rZ9Z+oql2SPD7Jxsy2wtlsZjPPAgAAAACAn9guS/iq2j/Js6fTK5Nk2hf+E9PYLy2InTAdL1w2ftGy6/NemmT3JBd398YFmWOrardlc3voNLfvJ7ls9XcCAAAAAMDObN1K+Kp6VlUdV1UPWDb+qCR/ntm+63/Z3TfOXT5rOp5WVT83l3lmkt9KckuS9y971PuS3JrkZVX1irnMAUneMZ2+cz7Q3VdkVrAfkOTtc5kNSd6bZJck7+nue7biLQMAAAAAsJPZsI7PPiTJuUm+VVVXZlagH5TkqZmtTr8myW/OB7r74qp6d5KTkny+qv42ya5JXpCkkpzY3bcsy9xcVa9J8qEk51fVJUluSnJMZnvIn9XdlyyY34lJLk9yUlUdleTaJE9P8ugkn07y+/fp3QMAAAAAcL+3ntvR/EOSP0ryjczK7V/ObH/2zyc5JcnTu/s7y0Pd/duZFeRfzKx8f2aSi5M8p7v/YtGDuvvDSZ6T5ONJnpzkxUm+lOTV3X3KCpnrp3vPS7J/kpcn+XGSM5Mc3d13bfU7BgAAAABgp7JuK+G7+4tJ/td7mT0vs3J8azKXJXnRVma+llnhDwAAAAAAW227/GBWAAAAAAC4P1DCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyyYb0nAAAAOeNB6z0DYDVn/GC9ZwAAsMOyEh4AAAAAAAZRwgMAAAAAwCBKeAAAAAAAGEQJDwAAAAAAgyjhV1FVD6yqt1bVdVW1saq+UVXnVNXD13tuAAAAAABs/5TwK6iq3ZN8Ismbk+yV5IIkX0tyYpLPVdWj13F6AAAAAADsAJTwKzstyRFJLk9ySHf/SncfnuSUJPsnOWc9JwcAAAAAwPZPCb9AVe2a5A3T6eu7+/ala919VpKrkzy3qp66HvMDAAAAAGDHoIRf7MgkD0pyQ3d/bsH186fjsdtuSgAAAAAA7GiU8Is9cTpeucL1pfHDtsFcAAAAAADYQW1Y7wlspx45HW9c4frS+EFb8mJVdc0Klx57ww035NBDD92aubEd+8a3b9/8TcC6OfTCvdZ7CsBKvut3KGzX/h//ZoHt1Q233LDeUwBWceg7/A69v7jhhhuS5BH3JquEX2yppblzhet3TMe97+NzfnzXXXfdce21137tPr4OsPYOno7+H+39yLU3rfcMAHYKfofeH3332vWeAcDOwO/Q+6Frv+F36P3II7JyX7wqJfw20N3+5AU7mKX/gsX/fgFg6/gdCgD3jt+hcP9lT/jFlv576D1WuL7ndLxtG8wFAAAAAIAdlBJ+sa9OxwNXuL40/pVtMBcAAAAAAHZQSvjFrpqOT1nh+tL41dtgLgAAAAAA7KCU8ItdluQHSQ6uqictuH7CdLxwm80IAAAAAIAdjhJ+ge6+O8nZ0+kfVtXSHvCpqpOTHJbkk9392fWYHwAAAAAAO4bq7vWew3apqnZPckmSw5N8M8mlSQ6azr+b5Iju/vK6TRAAAAAAgO2eEn4VVfXAJG9K8sokj0hyc5KPJXlzd9+4nnMDAAAAAGD7p4QHAAAAAIBB7AkPAAAAAACDKOEBAAAAAGAQJTwAAAAAAAyihAcAAAAAgEGU8AAAAAAAMIgSHgAAAAAABlHCAwAAAADAIEp4AAAAAAAYRAkPAAAAAACDbFjvCQCst6p6cpJjkxyW5KAke0+XbkvylSRXJ7mwuz+3PjMEgO1TVW1I8rNJbu7uezZz74OT7NXdX90mkwOAHUxV7Zbk8CQPS3JHkiu7+xvrOytgLVR3r/ccANZFVT0qyTlJnrs0tMrtneSSJK/t7n8dOjEA2M5V1UOSvCvJK5LsluSeJH+d5PTu/sIKmXOT/Hp3WwgEwE6pqn4xyde7+5oF1/63JGck2XfZpQuS/M/d/b3hEwSGUcIDO6Wq+vdJrkxyQGYr3c+fzm/MbMVBkuyZ5MAkT0nyS0mekOTbSZ5qNQIAO6uq2jPJPyb5+Wz6B+y7k5za3WcvyJ2b5FXd/YDxswSA7U9V/TjJud392mXjpyV5S2a/V/8pyfVJ9kvy7Mz+XfqFJM/o7ru27YyBtWJPeGBndWZmBfzJ3f2k7n5bd3+0u6/u7humr6unsbd19xOTnJrkoUneuq4zB4D1dXKSxyb5fJJnZVYOPCHJ+5PskuTdVfWOdZsdAGzffuoP2FX1iCRvTvLDJP99dz+ju3+tu1+c5NFJPp3k8Un+l20+U2DNKOGBndULk/xDd79rSwPdfVaSf0jyolGTAoAdwPFJbk3y4u7+THf/sLuv6e7fzOwzVn6Q5JSq+s9VtdpWbwBAclxmf8R+W3f/7fyF7v5ukv8xyV1JfnnbTw1YK0p4YGf14CT/ei9yX5myALCzekyST3f3t5df6O6PZrY6/mtJXpPkg9OHtwIAix2S2WeQnb/o4vSZZJ9N8h+24ZyANaaEB3ZWX03y7KraY0sD073PzqxYAICd1QMyWwm/UHf/tyRHJvlvma2av6Cqdt9GcwOAHc1SN7favzO/ktn2b8AOSgkP7Kw+mOTfJ/l4VR22uZunez6e5N8l+bPBcwOA7dlXMtubdkXd/fUkv5DZh8u9MMnHkuwzfmoAsN3bq6oeufSV5KZp/GGrZPZN8v3hMwOGqe5e7zkAbHPTiry/S3J4Zv/p3w1JrkxyY5I7p9v2SHJgkqckOTizD9D5TJLn+1R6AHZWVfX+JK9O8h+6+7rN3Ltnkr9M8vzMft+mux8weo4AsD2qqh9n+n24wK939yYLvqrqZzL7d+rXu/vpI+cHjGN/RmCn1N0bq+p5mX0K/esz29/2MUuXp+P8h8n9IMnZmX1YjgIegJ3ZXyY5Mcn/nuR1q93Y3XdU1YuS/N+ZffCcFUAA7Mw+lZV/Fx6ywvixmf0X2R8ZMiNgm7ASHtjpVdUume1d+8Qkj0yy13Tp9sz2jr8qyWXdfc/6zBAAth9V9cAkr0xyT3f/X1uY+Zkkb0iyX3e/ZeT8AOD+pKqemVlB/w/T564AOyAlPAAAAAAADOKDWQEAAAAAYBAlPAAAAAAADKKEBwAAAACAQZTwAAAAAAAwiBIeAAAAAAAGUcIDAAAAAMAgSngAAAAAABhECQ8AAAAAAIMo4QEAAAAAYBAlPAAAAAAADKKEBwAAAACAQZTwAAAAAAAwyP8HJMWOWb7+FSUAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 1800x600 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(figsize=(12,4),dpi=150)\n",
    "train['label'].value_counts().sort_index().plot(kind = 'bar')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:ylabel='label'>"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAf8AAAHjCAYAAAAt5RZeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAABcSAAAXEgFnn9JSAAA0s0lEQVR4nO3deZhcVYH38d/pTtIJIWSBACGQXAhZQAgJARISFHx1FKgRBHFFLARlwNFR2byvyrzozDuW+row7uPo6OiMOqiE5SKLYRUUZEm4IBACFLsgSXens3S6u+q8f9yKNKGT9HJvnbt8P8/TTyddVbd+5NH+1bn33HOMtVYAAKA4WlwHAAAAzUX5AwBQMJQ/AAAFQ/kDAFAwlD8AAAVD+QMAUDCUPwAABUP5AwBQMJQ/AAAFQ/kDAFAwlD8AAAVD+QMAUDCUPwAABUP5AwBQMJQ/AAAFQ/kDAFAwlD8AAAVD+QMAUDCUPwAABUP5AwBQMJQ/AAAFQ/kDAFAwlD8AAAVD+QMAUDCUPwAABUP5AwBQMJQ/AAAFQ/kDAFAwlD8AAAVD+QMAUDCUPwAABUP5AwBQMJQ/AAAFQ/kDAFAwlD8AAAVD+QMAUDCUPwAABUP5AwBQMJQ/AAAFQ/kDAFAwlD8AAAVD+QMAUDCUPwAABUP5AwBQMJQ/AAAFQ/kDAFAwlD8AAAVD+QMAUDCUPwAABUP5AwBQMJQ/AAAFQ/kDAFAwlD8AAAVD+QMAUDCUPwAABUP5AwBQMJQ/AAAFQ/kDAFAwlD8AAAUzynUAAM3n+cEukiZK2m0HX/0fnyBptKSapHrje/8/90javJ2vdZKe3/pVrZQ2NOO/EcD2GWut6wwAYuT5gZE0TdIB23zNkrS/pKly+8F/g6IPAi/olQ8F2/75uWqltNFZQiDnKH8ggzw/GCNpjl5b8AdI8iSNcxYuPi9IeqDfVyjp4Wql1OM0FZADlD+Qco2R/GxJR0la3Pi+QNIYh7Fc6ZX0qF79oeCBaqX0nNNUQMZQ/kDKeH4wVa+U/GJJR0qa7DRU+q1VdGZglaTbJN1crZTa3UYC0ovyBxzy/GC0onJf3O/Lc5kpJ+qSVkpa0fi6vVopbXKaCEgRyh9oMs8PZko6vvH1vxTNpkeyeiTdpeiDwE2S/lCtlHrdRgLcofyBhDUm5x0r6URFhT/PbSJI2ijpdkUfBFZIWlmtlOpuIwHNQ/kDCfD8YKKisj9Z0glidJ92L0r6taRfSrq1WinVHOcBEkX5AzHx/GAfSacqKvxjFS2Kg+x5SdIVij4I3MwHAeQR5Q+MQOOU/kmSzpL0FkmtbhMhZi8r+hDw02qldIfrMEBcKH9gGDw/mK+o8E+XtIfjOGiOJyT9t6IPAo+6DgOMBOUPDJLnB5MlvU9R6R/uOA7cukfSjyX9uFopdbkOAwwV5Q/sgOcHLZLeLOmDkt4uaazTQEibLkn/Ielfq5XS467DAINF+QMD8PxghqQPSSpLmuE4DtKvLimQdFm1UlrhOgywM5Q/0I/nB6+T5Et6j9jyGsPzoKR/VTQ3YLPrMMBAKH9AkucHiyV9WtLbJBnHcZAPayX9m6RvsfEQ0obyR6F5fvA3kv63pDe6zoLc6pP0K0WXBH7vOgwgUf4ooMYkvlMUnd4/wnEcFMudkj5drZRudR0ExUb5ozAaO+i9X9LFYn19uHW9og8B97kOgmKi/JF7nh+MlXSOpAsl7ec4DrCVlXS5pEuqldJq12FQLJQ/cs3zg3dJ+qIkz3EUYHv6FK0V8DkmBqJZKH/kkucHiyR9XdIxjqMAg9Ut6ZuSvlCtlNa5DoN8o/yRK54fTJP0L4oW5+GWPWRRp6T/J+lr1Uppo+swyCfKH7nQuK5/vqLb9nZ1HAeIw4uS/lnSd9hWGHGj/JF5nh+8U9KXxHV95NP9kj5crZTudR0E+UH5I7M8Pzhc0XX91zuOAiStpmjJ4Eu4FIA4UP7IHM8P9pRUUXRdv8VxHKCZnpJ0XrVS+o3rIMg2yh+Z4vnB6ZIuk7S76yyAQ7+Q9PFqpfSi6yDIJsofmdCYxf9dSSe5zgKkRLui1Sp/UK2U+EWOIaH8kXqeH5QlfU3SZNdZgBS6VdLfVSulR10HQXZQ/kitxmj/3yWd6DoLkHJbJP1fSZVqpdTrOgzSj/JHKnl+cJqk70ma4joLkCEPSnpvtVJ60HUQpBvlj1Tx/GA3RUucnuE6C5BR3ZIuqFZK33YdBOlF+SM1PD94g6T/lDTTdRYgB5ZLOpt9AjAQyh/OeX4wStEypheJ+/aBOD0j6f3VSuk210GQLpQ/nPL8YKqk/5F0nOMoQF7VJH1e0j9XK6W66zBIB8ofzjS23f21pBmuswAFcL2k06uV0lrXQeAep1jhhOcHZ0j6nSh+oFneKul+zw8Wuw4C9xj5o6ka1/e/KuljrrMABdUj6cJqpfQN10HgDuWPpmlc379c0rGuswDQzyWdVa2UNrsOguaj/NEUjev7V0jaz3UWAH91l6S3VSulv7gOgubimj8S11ib/3ei+IG0WSzp954fzHYdBM3FyB+J4fo+kBlrJZ1UrZTudB0EzUH5IxGeH0xUtMLYcW6TABikbkULAv3KdRAkj9P+iJ3nB3tKulkUP5AlYyX9j+cHn3QdBMlj5I9YeX4wQ9KNkua4zgJg2C6TdD4rAuYX5Y/YeH4wV1HxM7EPyL4rFK0IyK2AOUT5IxaeHyxUtHzoVNdZAMTmD4puBXzZdRDEi2v+KWaMGWeM+bwxZrUxptsY87wx5ofGmOmus/Xn+cExiq7xU/xAvixRdCvgga6DIF6M/FPKGDNWUaEukfSCpNsleZKOkvQXSUustU84C9jg+cHxkn4laRfXWQAk5nlJx1YrpTWugyAejPzT67NqfOqWNMda+25r7WJJFygaYf/QZThJ8vzgXZKuEsUP5N0+km7y/GB/10EQD0b+KWSMGSPpJUkTJR1urb1/m8dXSZov6Qhr7b0OIsrzgw9J+p74AAkUyVOS3lCtlJ52HQQjwy/udFqmqPgf37b4G37Z+P625kV6hecHF0r6vvjfD1A0MyXd7PlBquYdYej45Z1OhzW+37edx7f+fH4TsrxKYwGQLzf7fQGkxgGKLgFMcx0Ew0f5p9OMxvdnt/P41p/PbEKWv/L84AxJX2nmewJIpTmSVjRW80QGUf7ptGvj+6btPL6x8X1CE7JIkjw/KCmaZGia9Z4AUu0gRR8A9nAdBENH+WOnPD9YJulySaNcZwGQKodI+q3nB1NcB8HQUP7ptKHxfXu30I1vfO9KOojnB4dIulrSuKTfC0AmHSbpBs8PJrkOgsGj/NNp6200+27n8a0/fyrJEJ4feIqW7J2c5PsAyLxFkq73/GA310EwOJR/Oq1qfD98O49v/fkDSQVoTOS5QdHiHgCwM0dJ+oXnB62ug2DnKP90ukNSp6RZxpgFAzx+WuP71Um8uecHEyT9RtLsJI4PILeOl/RV1yGwc5R/CllreyR9s/HXbxljtl7jlzHmfEX399+axOp+nh+0SbpS2z/rAAA78g+eH5zrOgR2jOV9U6qxsc8tkhbrlY19Zjb+nsjGPp4ftCia1X9qnMcFUDh9ko6vVkorXAfBwBj5p5S1tlvSGyX9k6L7/d+uqPx/pGi9/yR29LtMFD+AkRsl6XLPD7h0mFKM/CFJ8vzgbEn/7joHgFxZLWlJtVJqdx0Er0b5Q54fLFF0iaHNcRQA+bNC0SWAPtdB8ApO+xdcY3OOX4niB5CMN0n6husQeDXKv8A8PxijqPi5lx9Aks71/OBjrkPgFZR/sX1T0tGuQwAohK95fvBW1yEQ4Zp/QXl+8EFFu/QBQLN0SlpcrZQedR2k6Cj/AvL8YL6kP4jNegA03ypFdwB0uw5SZJz2L5jG0r2Xi+IH4MZhkr7iOkTRUf7F8wNJc1yHAFBoH/H84BTXIYqM0/4F0pht+6+ucwCApHZJC6qV0tM7fSZiR/kXhOcHCxVd5x/jOgsANPxO0nHVSqnmOkjRcNq/ADw/GK1oTwCKH0CaHCPpEtchiojyL4ZLFG0DDABp8xnPD45yHaJoOO2fc43T/Xcr2mULANJotaSF1Uppk+sgRcHIP8cap/v/QxQ/gHSbI+nLrkMUCeWfb59RdE8tAKTdRzw/ON51iKLgtH9OeX6wQNHp/tGOowDAYL0g6ZBqpbTOdZC8Y+SfQ/1m91P8ALJkmqSvug5RBJR/Pn1anO4HkE0f8PzgGNch8o7T/jnj+cFhkv4oRv0AsusBSYez+E9yGPnniOcHo8TpfgDZN1/SR12HyDPKP18+LWmB6xAAEIPPeX6wt+sQeUX554TnBwcourUPAPJgorj3PzGUf358UazdDyBf3u/5wetdh8gjJvzlgOcHyxTtjgUAeRMqmvzX5zpInjDyzzjPD4ykr7jOAQAJOVTSx1yHyBvKP/veLWmx6xAAkKDPeX4wzXWIPKH8M8zzgzZJX3CdAwASNkGc4YwV5Z9tH5fkuQ4BAE3wXs8PjnMdIi+Y8JdRnh/sIWmNotthAKAI7pe0qFopUVwjxMg/uy4VxQ+gWBZKOtl1iDxg5J9Bnh/MU3T7yyjXWQCgyVYquvWP8hoBRv7Z9CVR/ACKaYEY/Y8YI/+M8fzgjZJucp0DABxaKUb/I8LIP3sqrgMAgGMLJL3dcYZMY+SfIYz6AeCvVklayOh/eBj5Z8vFrgMAQEocJukU1yGyipF/Rnh+cKikB1znAIAUYfQ/TIz8s4NRPwC8GqP/YWLknwGeH8yQ9Li4vQ8AtvWApAWM/oeGkX82nC+KHwAGMl/Sqa5DZA3ln3KeH0yR9CHXOQAgxf7RdYCsofzT7yOSxrsOAQApNt/zg2Ndh8gSyj/FPD8YK+ljrnMAQAac6zpAllD+6XampD1dhwCADDjV8wN+Xw4S5Z9Snh+0SLrAdQ4AyIgxks5yHSIrKP/0OlXSga5DAECGnNMYOGEn+EdKr4+6DgAAGbO/pLe6DpEFlH8KeX5woCRmrgLA0DHxbxAo/3T6oOsAAJBRJc8P9nMdIu0o/5Tx/KBV0Sx/AMDQtUr6sOsQaUf5p8/xkvZxHQIAMuxszw9YEn0HKP/04VYVABiZfSSd5DpEmlH+KeL5wVRJb3OdAwBy4DzXAdKM8k+Xd0sa7ToEAOTAmzw/mOU6RFpR/unyPtcBACAnjKT3uA6RVpR/Snh+cICko13nAIAceafrAGlF+acHo34AiNdhnh/McR0ijSj/9DjddQAAyCFG/wOg/FPA84PDJc1znQMAcuhdrgOkEeWfDnwyBYBkzOfU/2tR/ulwousAAJBjp7kOkDaUv2OeH+wjab7rHACQYye7DpA2lL97x7sOAAA5d6TnB3u7DpEmlL97J7gOAAA5Z8TS6a9C+TvU2HXqb1znAIAC4NR/P5S/W0dLmug6BAAUwJs8P9jFdYi0oPzd4pQ/ADTHWElvcR0iLSh/t5jsBwDNQ/k3UP6ONGaeLnCdAwAK5BjXAdKC8nfneEUzUAEAzXGI5weTXIdIA8rfHa73A0BzGUnLXIdIA8rfAc8PWsUtfgDgwutdB0gDyt+NoyRNdh0CAAqI6/6i/F1Z6joAABTUkZ4fjHUdwjXK341FrgMAQEGNkXSk6xCuUf5uHOE6AAAUWOGv+1P+Teb5wURJB7rOAQAFVvjr/pR/8y0S9/cDgEtLPT8odP8V+j/eEU75A4BbEyUd6jqES5R/8zHZDwDcK/R1f8q/+Rj5A4B7hV7pj/JvIs8PJks6wHUOAIAOcR3AJcq/uRj1A0A6zC7ypL/C/oc7QvkDQDq0SdrfdQhXKP/movwBID3muQ7gCuXfXMz0B4D0oPyRrMZkv5mucwAA/oryR+IKe20JAFKK8kfiPNcBAACvQvkjcZzyB4B02cPzg91dh3CB8m8ez3UAAMBrFHL0T/k3DyN/AEgfyh+J8lwHAAC8BuWPRDHyB4D0ofyRDM8PJkqa5DoHAOA1ZrsO4MKokR7AGPPDEbzcWmvPHmmGDGDUDwDptKfrAC4Ya+3IDmBMfQQvt9ba1hEFyADPD06SdKXrHACA17CSRlcrpZrrIM004pG/pDfGcIy881wHAAAMyEjaXdJLroM004jL31p7axxBco7T/gCQXnuoYOXPhL/m8FwHAABs1x6uAzRbHKf9B2SMGSWpJOkoRf+wd1lrf9h4bJ/Gz/5kre1LKkOKTHcdAACwXZR/HIwxx0j6qaT9FF1PsZJGS9p6Z8DRkv5H0jsl/TqJDCkzyXUAAMB2Fa78Yz/tb4w5WNJ1kqZJ+oakdyn6ANDf1ZI2SXpH3O+fUhNdBwAAbFfhyj+Jkf8lksZKOtFae4MkGfPq7rfW9hhj7pO0MIH3TyPKHwDSq3Dln8SEvzdKuntr8e/Ac5L2SeD9U8Xzg9GSxrnOAQDYrsJt65tE+U+S9Mwgnjde0TyAvGPUDwDpxsg/Bi9JOnAQzztIg/uQkHW7uQ4AANghyj8GN0laYIzZ7sp/xphTFH1AuDGB908byh8A0o3yj0FFUo+k5caY84wxe299wBgz2RhzlqQfSNoo6asJvH/acL0fANJtsusAzRZ7+VtrH5H03saxv6loYp+VVJb0sqTvS2qTdLq19sm43z+FxroOAADYoTGuAzRbIsv7WmuXSzpE0X3+j0jqVnQ24AlJ35M031p7VRLvnUKUPwCkW+53l91WYsv7WmufkvSJpI6fIZz2B4B0S6wL04qNfZLHyB8A0q1wI//Eyt8Y02aMeZ8x5jvGmCsbX98xxpxujClSIRbpvxUAssh4frDtMvS5ltTGPm+W9CNF6/tv+w96jqQvGWPOtNYW4Va/wp1OAoAMGiWp13WIZom9mIwxiyVdo2j25F2Sfiap2nh4pqI7AZZIutoYc6y19q64M6RMj+sAQJxaTXfX6H1+da9e+8EeyKz6lj1bol3oiyGJUek/KVq29zxr7fcGePwbxphzJH1X0uclvTWBDGnS7ToAEKeabdt1yvgHJ3e32sNcZwFiVHcdoJmSuOa/WNI92yl+SZK19t8k/VHRGYC82+w6ABAvYxZ2TC7C0twolprrAM2URPnXJa0ZxPPWKFr8J+8Y+SN3Dm2fPt5Yu851DiAmNiyHjPxH6G5J8wfxvPmN5+Yd5Y/c+W3f4umLu7tD1zmAmBRq1C8lU/6XSJptjPmcMeY1xzeRz0ma3Xhu3nHaH7mzys6a/Ym166fI2iKcvUP+Fa78RzzhzxjzgQF+/GNJn5V0hjHmV5Keavx8pqRTJXmK1vifq+iOgDxj5I8cMmZiz4TNk+v1le2trQtdpwFGiPIfhh9p4Gv3RlHJX9Dv8f63Bp0j6cOS/jOGDGlG+SOXbqot3HxOx+9rX9x9iusowEgV7vd0HOX/eRVj4t5wcdofubS8tmyvX6y/cdaXpkz+izVmqus8wAisdR2g2UZc/tbaS2PIkWeF+0SJYrjfHjhnlFXXGzZv/tOtu+xyrOs8wAgUrvzZ2Cd5jPyRS1YtLS9o99UXr+2YJWsLdZsUcofyR+wY+SO3bqkdtmlGX9++U2u1+1xnAUaA8o+LMeYYY8yXjTHLjTErjDE3DfC1Iqn3T4tqpdSnAs4kRTEsry2bKkl/397JyB9ZVrjyT2JjHyPpB5LKemV2v9WrZ/pv/XtRJgp2SZrkOgQQt3vs3LnWav3bN2xc9Pk9prxQN2aa60zAMBSu/JMY+Z8r6UxJ90r6G0m/bvx8rqQTFN0aWJf0ZUkHJPD+afS86wBAEupqaX1Rk1e3Sq1v3rR5tes8wDC97DpAsyVR/mdK2ijpBGvtCkWjXllrH7PWXm+tPUvRtr4XSlqQwPun0bOuAwBJubU2f4MkXbi2fY6s5RIXsoiRfwwOknSntXbrP6aVJGNM69YnWGt/qejMwIUJvH8aUf7IrSvr0XX/abXatGl9tXtc5wGGgfKP6Zj9/yE3Nb5P3uZ5j0k6NIH3TyO2P0Vu3VU/aK612iBJ/9De0bqz5wMpRPnH4DlJ+/T7+9Z1/bdd/3uOpL4E3j+NGPkjt2pqHfUXTXxUkk7cuOnwUdbyYRdZQ/nH4D5JB/c7zX+Dopn9XzLGzDPGTDDGXCRpkaT7E3j/NKL8kWu31+d3SVKL1HLCho2Pu84DDBHlH4OrJO0hqSRJ1tpVkn4u6TBJD0nqkFRRNOr/TALvn0aMhJBry2vLdt/650+2d7xO1va6zAMMwcawHBZuMbbYy99a+zNJ4yQF/X5clvRpSX+UtEbStZLeZK29O+73TylG/si139cPnmdtNL9naq0+dUZfHxP/kBVPuA7gQiIr/Flrt9h+t/xYa3uttRVr7RJr7Vxr7dustbcn8d5pVK2UOtW45RHIoz6NGr1Wuz269e+fXNcx1mUeYAgKuT4Fa/s3z3OuAwBJuqN+SOfWP79p0+YFo6190mUeYJAofySK6/7IteW1ZX+9nddI5u1dG552mQcYpEd3/pT8GfHa/saYkVwvsdbaWSPNkBFc90eu3VE/ZJ616jZGYyXpY+2dh14+YdduGcMlAKRZIUf+cWzs48VwjCKg/JFrPRrd1q4JK6eoa4EkTa7Xp8zq7b3j8TFjljmOBuxIIct/xKf9rbUtI/mK4z8iIwo5oxTFcmf94I7+f79gXcdujqIAg7E2LIeFu8df4pp/M61yHQBI2pW1ZZP6//31m7sPbavXH3MUB9iZQo76Jcq/mR6SxMInyLXb6vPnWast/X/2zq4NbGmNtKL8kaxqpdQj6U+ucwBJ2qIxYzs0/lWzp8/r6Fwgazdt7zWAQ5Q/mmKl6wBA0u6qH7yu/993q9uJB/X03ucqD7ADhbzNT6L8m60oGxmhwJbXlr5mkt9F69qnuMgC7AQjfzTFStcBgKTdWl8wz1r19P/Zkd1bDh5Xrz/sKhMwACupsJNRKf/mWqnof3BAbm1W2y7rtcsj2/789PVdL7vIA2zHI0XczW8ryr+JGhv8VF3nAJJ2d33eum1/9qGO9QtlLRtcIS3udB3AJcq/+bjuj9y7srZswrY/G2/trvO39Kx0EAcYCOWPplrpOgCQtJvqC+dZq75tf+6vbd/LRR5gAJQ/moqRP3Jvk8aO79K411z3P7SnZ86u9fqDLjIB/axVgW/zkyh/F1a6DgA0wz31uQNO8Duzc31Hk6MA2/pDWA4LPfma8m+yaqX0rCRmPSP3rqot3XWgn5c7uxYZazuaHAfor9Cn/CXK35U/uA4AJO3G+qK51qq27c/HWjvuiO4tD7jIBDRQ/q4DFNTNrgMASduocRM2auyA11U/tbZ932bnARr6JN3tOoRrlL8blD8K4b767L8M9PO5vb0HTKzV2OYaLqwKy2HhN5qi/N1YKek1i6AAeXNVfem47T324Y71G5uZBWgo/Cl/ifJ3olopWUm3uM4BJO362hFzrVV9oMfeu77rCGMtk1/RbJS/KH+XbnIdAEhal8ZP3KS2Aa/7j5HGLN3c/VCzM6HwKH9R/i6tcB0AaIaV9QNf2t5jn1rX7snaQt9vjaZaE5bDp12HSAPK35FqpfSIpGdd5wCSdnX96LHbe2z/3r6Zu9fq9zUzDwrtStcB0oLyd+s3rgMASbuuduRca7e/lfW5HZ29zcyDQrvKdYC0oPzdovyRex2aMGmzxqze3uOndW04osXaF5uZCYW0VtIdrkOkBeXv1m8lMepB7j1gZ/15e4+NkkYdt2nzw83Mg0K6NiyHr1lxsqgof4eqlVKX+CSKArimtqRtR49ftK59tqwd8JZAICac8u+H8nePU//IvWtrR83e0XX/fftq0/eq1e5tZiYUyhZJ17kOkSaUv3vXug4AJG2dJu6+RaPX7Og5H23vbFYcFM/NYTnc4DpEmlD+jlUrpQcl/cl1DiBpod3/hR09ftKGjYtarX2uWXlQKJzy3wblnw7/5ToAkLRraktG7+jxFqnlLRs37fDsADBMlP82KP90+G9p+9dDgTwIaksO3NlzLljXMU/W9jUjDwrjvrAcckZpG5R/ClQrpaqY9Y+ce1mTpm6xox/f0XP2qtX22revdk+zMqEQWNVvAJR/enDqH7n3kJ250xHYx9s7xjQjCwqD8h8A5Z8el4sFf5BzQW3JqJ09560bNy0cZe1TzciD3HswLIerXIdII8o/JaqV0lpxHypyLqgtmbWz5xjJvG3DxiebkQe59wPXAdKK8k8XTv0j1/6sKXv12FE7LfaPr+s4RNb2NCMTcqtH0k9ch0gryj9drpLU5ToEkKSH7YydbmW9e72+h9fbx8Q/jMTysByudR0irSj/FKlWSpslXeE6B5Cka2uLB/V754L2jl2SzoJc45T/DlD+6cOpf+TaNbUlBwzmecdt2rxgjLVPJJ0HufSUpBtdh0gzyj99Vkja7vanQNY9p6nTem3roGbzn9q1YaeXCIAB/EdYDlk4bQco/5SpVko1ST93nQNI0qN2v6cH87yPtnfOl7XdSedBrtQl/dB1iLSj/NPpu2K5X+TYb2pHmcE8b2K9Pml2by9b/WIobgzL4TOuQ6Qd5Z9C1UrpUbHVL3Ls6vrR3mCfe9HajokJRkH+MNFvECj/9PqK6wBAUp62e+3ba1sHdT3/6O7uQ8bW648mnQm58LJYzndQKP+UqlZKN0u633UOIClr7PTqYJ/7nvUbXkwwCvLjJ2E5ZHGoQaD80+1rrgMASbmuduSgn/t3HZ0LZe2GBOMg++qSvuc6RFZQ/un2c0nPuw4BJOGq+tKZg33urtZOOKSnhzNh2JErwnLI5aFBovxTrFop9Ur6huscQBKetNP267Mtg/5we9Ha9j2SzIPM+xfXAbKE8k+/70na6DoEkITH7T6D3r3v8C09B+1Sr/8pyTzIrOvDcnif6xBZQvmnXLVSapf0I9c5gCTcUD+iPpTnf6Cza11SWZBpjPqHiPLPhq8rmswC5MqVtaUzhvL8szrXHy5rO5PKg0y6IyyHt7kOkTWUfwZUK6U1kq52nQOI2xq778yaNYPey2Kctbss3LJlVZKZkDlfcB0giyj/7GDRH+TSk3bakHbu+9TajmlJZUHmrArLYeA6RBZR/hlRrZRul3S36xxA3G6sL+obyvNf19Mze0Kt/kBSeZApjPqHifLPls+4DgDE7crasv2G+pqzOtd3JZEFmfKYpMtdh8gqyj9DqpXSbyXd4DoHEKdH7Iz9a9a8NJTXnLF+/SJjLTP/i+2LYTlkIvQwUf7Z8ymx3S9y5im71+NDeX6b1djF3VvCpPIg9Z6R9J+uQ2QZ5Z8x1UpppaT/dp0DiNNv64f3DvU1n1rbPqTbBJErXwzL4ZD/N4NXUP7Z9FlJW1yHAOJyZW3Z9KG+5sDe3v0n12orE4iDdHtYbOAzYpR/BlUrpaqkb7vOAcTlIbv/rLo1Lw/1ded0dG5OIg9S7YKwHA7pDhG8FuWfXf9XEiudITeesVPXDPU1716/4Qhj7V+SyINUui4sh79xHSIPKP+MqlZKayV90XUOIC4r6ocP+VLWaGn06zd3s9lPMfRJOt91iLyg/LPt65Kecx0CiMOVtWX7DOd1F69tP0DWcstX/n03LIcPuw6RF5R/hlUrpc2S/o/rHEAcVtkDDqxbM+R792f29e03tVZjO9d8axe/62JF+WffjyRx2hM5YMxzdvfHhvPKj3R0MvLPt8+F5ZBFnWJE+WdctVKqSfJd5wDicHN9YfdwXndK18ZFLda+EHcepMIjkr7lOkTeUP45UK2UrpZ0nescwEgtry3beziva5Va37Rp86Nx50EqXMitffGj/PPjPEmbXIcARuJ+e+Bsa4d3C+uF69rnytpa3Jng1PVs2ZsMyj8nGgv/XOo4BjAiVi0tz2v3YY3g9+mrTZtWq90bdyY4w619CaL88+Vrkla5DgGMxK21w4a9at8/rOvgd1p+XBaWQyYzJ4T/o+RItVLqk/RhScx8RmYtry3bc7ivPXHjpsNbrX02zjxw4jFJl7gOkWeUf85UK6U/ipmxyLB77Nw51mr9cF7bIrWcuGHTkLYHRupYSWeH5ZB9GxJE+efTpyVVXYcAhqOultYXNXnYM/c/2d5+sKxlu9fs+nZYDm93HSLvKP8cqlZKGySdregTNJA5t9YOG/adK1Nr9akz+vruiTMPmqYq1i1pCso/p6qV0k1iz2tk1BX1ZXuM5PWfWNcxNq4saKpzwnK4wXWIIqD88+0iSU+5DgEM1R/r8+Zaq2GXwJs3bV4w2ton48yExH07LIc3xnEgY8wtxhi7g6/j43ifLKP8c6zf6X8gU2pqHfUXTXpkuK83kjm5a8PTcWZColYrGqzE7VeSfjzAV+F3QzXWclk47zw/+I6kc13nAIbiK6O/c+s7Wm8/drivb29pWfeGGdPHy5i2OHMhdjVJy8JyeFdcBzTG3CLpWEn7W2urcR03Txj5F8P5kkLXIYChuKJ2zJSRvH5yvT5lVm8vK/6l37/EWfwYHMq/AKqV0mZJp0nqcp0FGKw/1A+aZ+3I9qu4YF3HrnHlQSLulfR51yGKiPIviGqltFpc/0eG9GnU6LXabdjX/SXp9Zu757fV62viyoRYdUk6PeEd+842xnzbGPNNY8w/GGNmJPhemUL5F0i1Urpc0jdc5wAG63f1Q0Z8tuqdXRuejyMLYndmWA6T3ob5s4p2PP17SZdJWmOMYdlgUf5FdKGku12HAAZjee2YySM9xnkdnYfJWra7TpcvhuXw1wke/zZJZ0iaJWkXSXMlfUbRToGfN8Z8PMH3zgRm+xeQ5wczJN0vaUQTqoCkjVZfz+q2D9SM0biRHOed++z9u0faxhwTVy6MyI2STgjLYa3Zb2yMeYuk6yV1SNrHWlvY/QMY+RdQtVJ6WtGnYj75IdV6NWrMOk0Y0XV/SbpoXTsfdNPhKUnvdVH8kmStvUHSPZImSVrsIkNaUP4FVa2UrpX0Bdc5gJ35ff11w9rhr7+jurccPK5eH/GHCIxIt6RTw3K41nGOxxrfpzlN4RjlX2z/KOlm1yGAHbmitmxiHMd53/quv8RxHAzbeWE5vM91CElb55FsdJrCMa75F5znB3spuv5f6E/BSK829XQ/0namMUYjWqlvozEblszc18qYCXFlw6B9NyyH57kOYYyZKulJSeMl7WetfdZxJGcY+RdctVJ6UdJ7FC2xCaTOFo0Z26FdR3zKfry1u87f0rMyhkgYmt9LatrsemPMUmPM240xrdv83JN0haLiv6rIxS9R/pBUrZRuU3QvLJBKf6gf1BHHcT61rn3POI6DQXtR0mlhOexp4nvOUVTyzxpjAmPMfxljfifpYUnLJD0k6cNNzJNKlD8kSdVK6ftimU2k1PLast3iOM78LT1zd63XH4rjWNipPknvCsthsxdZukvSdyQ9L+lISe+SdIiklZIukHSktfalJmdKHa7541U8P/i+pA+5zgH0N1ZbNj/c9sFWYzRmpMf67qTd7vjW5EnL4siF7bKSPhCWw5+6DoKBMfLHts6VdI3rEEB/3Wobt167xHKr3pmdXYfL2s44joXtOp/iTzfKH69SrZRqkt6t6NQZkBp31w9qj+M4Y60dd0T3llVxHAsD+kJYDr/uOgR2jPLHa1QrpU2S/lbSatdZgK2W15bFtj3vp9a1T4/rWHiVfw/L4addh8DOUf4YULVSelnS8ZL+7DoLIEk31xfMs1axbP86r6d31sRajdF/vK5QdNkQGUD5Y7uqldKTkk5UtO824NQmjR3fpXGxLdF7duf6Qq/wFrNb5HDNfgwd5Y8dqlZK90s6VVKv6yzAPfW5sa0Lf3pn1xHG2pfjOl6B3S/p5LAcbnEdBINH+WOnqpXSbyWdJXYBhGNX1paNj+tYY6QxSzd3PxjX8QpqjaTjw3I44s2X0FyUPwalWin9VNInXedAsf22fvhca+Nbivride37i8VOhusFSW8Jy2HhF8zJIsofg1atlC6T9BFxBgCObNS4CRs19tG4jndAb9/M3ev1++M6XoG0S3prWA6fdB0Ew0P5Y0iqldJ3JJ0pNgKCI/fW58S6Ne+57Z3NXHc+D16UdFxYDkPXQTB8lD+GrFop/aek94pJgHDgqtrSXeI83mldG45osfbFOI+ZY09Len1YDh9wHQQjQ/ljWKqV0uWK7gJghi+a6ob6ojnWqh7X8UZJo47dtDm2WwhzbLWi4n/MdRCMHOWPYatWStdIepukTa6zoDi6NH7iJrXFdt1fki5e136grI3tA0UOrVJU/E+7DoJ4UP4YkWqldKOilQBZCAhNs7J+YKwzzPftq03fq1a7N85j5sjvFV3jZ1Z/jlD+GLFqpXS7pDcpmgEMJO7K+tJxcR/z79vZ6G8AKyT9TVgOO1wHQbwof8SiWin9UdIbJcU6ExsYyPW1I2O97i9JJ2/YuKjV2ufiPGbGXSmpFJZDlkHOIcofsalWSqskvUHS866zIN86teukzRoT68SzFqnlLRs3MZkt8lNJp7Fkb35R/ohVtVJ6RNJSSSybikQ9YGfFfnve+es65snaWHYOzLDvSPpAWA6L/u+Qa5Q/YletlJ5S9AHgWtdZkF9X1Y4eE/cx967V9p7eV9iJfzVJF4bl8CNhOWQVz5yj/JGIaqXUJekkSZe5zoJ8+k3tqDnWxr/U9MfbO0bFfcwMaJd0YlgOv+I6CJrDsKcFkub5wd9J+qakIv5SRYIeaSs/Ntb0zo7zmFayh3v7PdNnzIw4j5tiDynakvdx10HQPIz8kbhqpfQ9RWsBcCsgYhXaA16I+5hGMn+7YWNRNqxZLmkJxV88lD+aoloprZB0hCTWBEdsrq4tGZ3EcT+xruN1sjbPG/5YSZ+XdGpYDje4DoPmo/zRNNVK6QlJR0v6hessyIdra4sPTOK4u9fre3i9fXmd+LdB0jvCcvh/mNhXXJQ/mqpaKW2qVkrvkXSx2BYYI/SyJk3dYkcncsr6/PaOWHcPTIknJB0dlsMrXAeBW5Q/nKhWSl+WdIKkda6zINsesjMTWVTqjZs2HzbG2ieSOLYjKyQdGZZD1uAA5Q93GpsCLZB0k+MoyLBrake3JnXsU7o2PJPUsZuoLumLkt4alkM+bEMSt/ohBTw/MJI+KelfJLU5joOM2UvrXrpr7Ef3TOLYnS0tHcfMmD5WxoxN4vhNUFW0Wt/troMgXRj5w7lqpWSrldJXFd0NsMp1HmTLi5qyZ48dlciteRPr9Umze3uzOvHvR5LmU/wYCOWP1KhWSg9KOkrSl6R4d2xDvj1sZzyb1LEvXNuxW1LHTsjLim7h+2BYDrtch0E6Uf5IlWql1FOtlD6laHvgp1znQTYEtSWJ/S5b2t196Nh6fXVSx49ZIOkQZvNjZyh/pFK1UrpN0nxJP3GdBel3TW3JAUke/91dG/6c5PFjsFHSuWE5/NuwHMa+2yHyhwl/SD3PD06T9D1JU1xnQXo91nbGU6NNbWYSx+4yZv3Smfu2ypjxSRx/hP4g6YywHK5xHQTZwcgfqVetlH4p6VBJ17vOgvR61O6X2G15E6zd7XU9Pfcldfxh6pP0j5KOofgxVJQ/MqFaKT1frZSOl3SGpNg3c0H2XVtbbJI8/sVr2/dI8vhDdKukw8Ny+E9hOWSlTAwZp/2ROZ4fTJB0iaRPSEpkYxdkz37mpedub/vE9CTfY/HMff+0qaXl4CTfYyeelXRhWA7ZHwMjwsgfmVOtlLqqldLFii4FXOc6D9LhGbvn9F7bmtgtf5L0/s4uVyvkbVG0CNY8ih9xYOSPzPP84CRJX5OU6IxvpN+1Y/w7Dm55ellSx99kzMbFM/ftkzETk3qPAQSSPsF1fcSJkT8yr1opXSXpYEmflbTJcRw4dF3tqERHM7tYO37hli3NWoVyjaS/bdy+R/EjVoz8kSueH+wn6f9JepfrLGg+z7zwzC1tF+yX5Hs8OGbMY++dvvfsBN9io6JT/F8Jy+GWBN8HBUb5I5c8PzhO0r8qmheAAlnT9v7nR5n6Pkm+x9IZ+4ZdrS1J/G/rF4om9CU6dwHgtD9yqVop3SJpoaQPSnrMbRo00+N2n0Q2+envrM7162M8nJV0haSFYTl8D8WPZmDkj9zz/KBV0rslfUbR3ADk2CdHXX77x0dd8fok32OLUfeRM/fbbI2ZPILDWEm/kvRPYTl8IKZowKBQ/igMzw9aJL1D0cTA+Y7jICGzzHNPrWi7KJFlfvv70N573nrXuLHHDuOldUmXKyr9h2KOBQwK5Y/C8fzASDpJ0UJBixzHQQIeb3v/C62mPi3J91gzevSTp+w7bf8hvKQu6eeS/jkshw8nFAsYFMofheb5wQmKPgQc7ToL4nPjmAvvmN3yfGL3+2/1hhnT729vbV24k6fVJP1MUek/mnQmYDCY8IdCq1ZKv6lWSkslvVnReunIgRvqR9Sb8T4f7ljfvYOHeyX9WNJBYTk8g+JHmjDyB/rx/OAYSR+TdIrYNyCz5pqnn7y+zR/KKflh6ZV6F3n7dVhjpvb78fOS/k3Sv4XlkE2okEqUPzAAzw/2lnS2pHMkzXAcB8PweNvpL7Uau2fS7/P3e0295bZdxh2n6MzRtyRdEZbDvqTfFxgJyh/YgcZtgiVJ50l6q6REt41FfG4ac8GdB7S8sDTht+l4dMzo7542fdp/heXwwYTfC4gN5Q8MkucHMyWd2fjyXGbBzv3vUf9129+NCt6Q0OFvl/R9Sb/UpZ2bE3oPIDGUPzBEjVsF36ho9cB3SBrnNhEGcrCpPn5t26dnxXjIP0v6iaR/16Wdq2M8LtB0lD8wAp4fTFS0idA7FH0gGOM2Efp7ou30l1uM3WMEh3hJ0Sp8/yPpNl3a2ZS7CICkUf5ATDw/2E3SCZJOlnSipGbu+Y4B3DLmk7/3Wl4c6hoOLytaa/8Xkm7RpZ21+JMBblH+QAI8Pxgt6VhFHwROlpToNrMY2CWjfnLr2aN+M5gleJ+QdKWk5ZLuoPCRd5Q/0ASeHyzUKx8EFrhNUxzzzeOPXdV2yewBHqpLulfS1ZKW69LOsLnJALcof6DJGncNnCzpLYqWFZ7iNlGeWftE2/vbW4ydIulhSSsk3aTodH6722yAO5Q/4FDjzoG5kpb2+5on1hMYKSvpT5J+99+j//nGpa1/ulOXdrLaHtBA+QMp4/nBZElL9MqHgaMk7eo0VPptkfRHSb9rfN1ZrZQY2QPbQfkDKddYZXC+og8CR0s6WNJsFfMDQa+k1YpG9Q/1+/5YtVLqdRkMyBLKH8gozw+mKfoQMKfxfeufZ0ka6zBaHHokPaao2LctedbNB0aI8gdyxvODFkn76tUfCg6UtLekqY2v8c4CSu2SXlS0gM5L/f78oqJV9FYrxSVvjNlF0WTNt0k6RtJMSTVJaxQtCPRVa+0GdwmBnaP8gQLy/GCcpD0UfRDYQ9IkSbsN8LWrosmHNUW3xw30faCf9Ulap9cW/EtZPz1vjPmQonX9pegOggcV/VstlTRB0iOSjrXWvuQmIbBzlD8ADIExpqyo6L9urX2438+nSQokLZT0M2vt+xxFBHaK8geAmBhjjpZ0p6K7D3az1vY4jgQMqMV1AADIkVWN722SdncZBNgRyh8A4nNA43uvojkPQCpR/gAQn483vl9nrd3iNAmwA1zzB4AYGGNOlHSNojsdjrTWrtrJSwBnGPkDwAgZY+ZJ+qmi2yIvoviRdpQ/AIyAMWa6pOskTVa0wM9ljiMBO8VpfwAYJmPMFEm3K9pv4T8knW35pYoMoPwBYBiMMbtKWqFo18VfS3qXtbbmNhUwOJz2B4AhMsa0SbpSUfFfL+m9FD+yhPIHgCEwxrRK+pmk/6XolP+prOSHrBnlOgAAZMxHJZ3S+PPLkr5tjBnoeRdaa19uWipgCCh/ABiayf3+fMp2nyVdqujDAZA6TPgDAKBguOYPAEDBUP4AABQM5Q8AQMFQ/gAAFAzlDwBAwVD+AAAUDOUPAEDBUP4AABQM5Q8AQMFQ/gAAFAzlDwBAwVD+AAAUDOUPAEDBUP4AABQM5Q8AQMFQ/gAAFAzlDwBAwVD+AAAUDOUPAEDBUP4AABQM5Q8AQMFQ/gAAFAzlDwBAwVD+AAAUDOUPAEDBUP4AABQM5Q8AQMFQ/gAAFAzlDwBAwVD+AAAUDOUPAEDBUP4AABQM5Q8AQMFQ/gAAFAzlDwBAwVD+AAAUDOUPAEDBUP4AABQM5Q8AQMFQ/gAAFAzlDwBAwVD+AAAUDOUPAEDBUP4AABQM5Q8AQMH8f8TljZcbqcRxAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 600x600 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(figsize=(4,4),dpi=150)\n",
    "train['label'].value_counts().sort_index().plot(kind = 'pie')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.2 测试集探索"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.2.1 数据信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_id</th>\n",
       "      <th>api</th>\n",
       "      <th>tid</th>\n",
       "      <th>index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>RegOpenKeyExA</td>\n",
       "      <td>2332.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>CopyFileA</td>\n",
       "      <td>2332.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>OpenSCManagerA</td>\n",
       "      <td>2332.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>CreateServiceA</td>\n",
       "      <td>2332.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>RegOpenKeyExA</td>\n",
       "      <td>2468.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   file_id             api     tid  index\n",
       "0        1   RegOpenKeyExA  2332.0    0.0\n",
       "1        1       CopyFileA  2332.0    1.0\n",
       "2        1  OpenSCManagerA  2332.0    2.0\n",
       "3        1  CreateServiceA  2332.0    3.0\n",
       "4        1   RegOpenKeyExA  2468.0    0.0"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 39173 entries, 0 to 39172\n",
      "Data columns (total 4 columns):\n",
      "file_id    39173 non-null int64\n",
      "api        39173 non-null object\n",
      "tid        39172 non-null float64\n",
      "index      39172 non-null float64\n",
      "dtypes: float64(2), int64(1), object(1)\n",
      "memory usage: 1.2+ MB\n"
     ]
    }
   ],
   "source": [
    "test.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.2.2 缺失值探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "file_id    0\n",
       "api        0\n",
       "tid        1\n",
       "index      1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.2.3 数据分布探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "file_id      10\n",
       "api         146\n",
       "tid         125\n",
       "index      5001\n",
       "dtype: int64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.nunique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.2.4 奇异值探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    39172.000000\n",
       "mean      1729.569284\n",
       "std       1486.018402\n",
       "min          0.000000\n",
       "25%        405.750000\n",
       "50%       1342.000000\n",
       "75%       2876.000000\n",
       "max       5000.000000\n",
       "Name: index, dtype: float64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test['index'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    39172.000000\n",
       "mean      2158.769938\n",
       "std        464.152821\n",
       "min        504.000000\n",
       "25%       2092.000000\n",
       "50%       2224.000000\n",
       "75%       2500.000000\n",
       "max       2920.000000\n",
       "Name: tid, dtype: float64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test['tid'].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.3 数据集联合分析"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.3.1 file_id分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_fileids = train['file_id'].unique()\n",
    "test_fileids  = test['file_id'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(set(train_fileids)-set(test_fileids))   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(set(test_fileids)-set(train_fileids))   "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.3.2 API分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_apis = train['api'].unique()\n",
    "test_apis  = test['api'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'CertCreateCertificateContext',\n",
       " 'CertOpenSystemStoreA',\n",
       " 'CoInitializeSecurity',\n",
       " 'CreateServiceA',\n",
       " 'CryptAcquireContextW',\n",
       " 'FindWindowA',\n",
       " 'FindWindowExW',\n",
       " 'GetComputerNameA',\n",
       " 'GetFileVersionInfoSizeW',\n",
       " 'GetFileVersionInfoW',\n",
       " 'IWbemServices_ExecQuery',\n",
       " 'LookupAccountSidW',\n",
       " 'LookupPrivilegeValueW',\n",
       " 'OpenServiceW',\n",
       " 'OutputDebugStringA',\n",
       " 'R',\n",
       " 'SendNotifyMessageW',\n",
       " 'SetStdHandle',\n",
       " 'StartServiceA',\n",
       " 'StartServiceW',\n",
       " 'UnhookWindowsHookEx',\n",
       " 'connect',\n",
       " 'timeGetTime'}"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(test_apis)-set(train_apis)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'CertControlStore',\n",
       " 'CryptAcquireContextA',\n",
       " 'CryptCreateHash',\n",
       " 'CryptExportKey',\n",
       " 'CryptHashData',\n",
       " 'DeviceIoControl',\n",
       " 'DrawTextExA',\n",
       " 'EncryptMessage',\n",
       " 'EnumServicesStatusW',\n",
       " 'FindResourceExA',\n",
       " 'GetAdaptersAddresses',\n",
       " 'GetAddrInfoW',\n",
       " 'GetAsyncKeyState',\n",
       " 'GetBestInterfaceEx',\n",
       " 'GetFileInformationByHandle',\n",
       " 'GetFileVersionInfoExW',\n",
       " 'GetFileVersionInfoSizeExW',\n",
       " 'GetUserNameA',\n",
       " 'GetVolumePathNameW',\n",
       " 'GlobalMemoryStatus',\n",
       " 'HttpOpenRequestA',\n",
       " 'InternetConnectA',\n",
       " 'InternetOpenA',\n",
       " 'IsDebuggerPresent',\n",
       " 'Module32FirstW',\n",
       " 'Module32NextW',\n",
       " 'NtDeleteValueKey',\n",
       " 'NtReadVirtualMemory',\n",
       " 'OpenServiceA',\n",
       " 'ReadProcessMemory',\n",
       " 'RegEnumKeyExA',\n",
       " 'RegEnumValueA',\n",
       " 'RtlAddVectoredContinueHandler',\n",
       " 'RtlAddVectoredExceptionHandler',\n",
       " 'RtlRemoveVectoredExceptionHandler',\n",
       " 'SetFileAttributesW',\n",
       " 'SetFileTime',\n",
       " 'SetWindowsHookExA',\n",
       " 'Thread32First',\n",
       " 'Thread32Next',\n",
       " 'WriteConsoleA',\n",
       " 'bind',\n",
       " 'listen'}"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(train_apis) - set(test_apis)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "tianchi_metadata": {
   "competitions": [],
   "datasets": [],
   "description": "",
   "notebookId": "116127",
   "source": "dsw"
  },
  "toc": {
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": true,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "384px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
